# selected countries
country_list = c('CHN','DEU','IND','JPN','PRK','PAK','PSE','SDN','SYR','USA')

# read the file with country code, country name, and life expectancy
life_df <- read_csv('life_exp.csv',show_col_types = FALSE) %>%
  mutate(year = as.numeric(year))

# read the file with food intake
food_df <- read_csv('Country-level Nutritions.csv',show_col_types = FALSE)%>%
  filter(varnum <15)%>%
  rename(intake=median)

# read the file with food code, and food name
food_code <- read_csv('food_code.csv',show_col_types = FALSE) 

# read the file with food code, and food name
country_code <- read_csv('country_code.csv',show_col_types = FALSE) 

# left join the two files above
food_life <- food_df %>%
  left_join(x=food_df, y=life_df, by = c('iso3'='Code','year'='year'))  %>%
  left_join(.,y=food_code, by = c('varnum'='varnum'))

Top Food Worldwide (by average daily intake)

top_food_box <- food_life %>%
  filter(year==2018,age==999,female==999,urban==999,edu==999) %>%
  ggplot(.,aes(x=reorder(Food,intake), y=intake))+
  geom_boxplot( aes(color=Food,group=Food,fill=Food),alpha = 0.7)+
  labs(title="Food Intake Worldwide",
       caption= "Source:Global Dietary Database",
       y="Average Food Daily Intake(g/d)",x='')+
  theme_pander()+
  coord_flip()

ggplotly(top_food_box)
top_food <- food_life %>%
  filter(year==2018,age==999,female==999,urban==999,edu==999) %>%
  group_by(Food)%>%
  summarise(intake_avg = round(mean(intake),2))%>%
  arrange(desc(intake_avg)) 

datatable(top_food,filter = list(position = "top"),rownames = FALSE) 

Top Food by Sex

top_food_sex <- food_life %>%
  filter(year==2018,age==999,female!=999,urban==999,edu==999) %>%
  mutate(sex=if_else(female==1,'female','male'))%>%
  group_by(Food,sex)%>%
  summarise(intake_avg = round(mean(intake),2))%>%
  arrange(desc(intake_avg))
## `summarise()` has grouped output by 'Food'. You can override using the
## `.groups` argument.
ggplot(top_food_sex,aes(x=reorder(Food,intake_avg), y=intake_avg,fill=sex)) +
  geom_bar(stat="identity",position="dodge", width = 0.5, alpha = 0.7) +
  labs(title="Differences in Food Intake for Female & Male",
       caption= "Source:Global Dietary Database",
       y="Average Food Daily Intake",x='')+
  theme_pander()+
  coord_flip()

Top Food by Area

top_food_area <- food_life %>%
  filter(year==2018,age==999,female==999,urban!=999,edu==999) %>%
  mutate(area=if_else(urban==1,'urban','rural'))%>%
  group_by(Food,area)%>%
  summarise(intake_avg = round(mean(intake),2))%>%
  arrange(desc(intake_avg))
## `summarise()` has grouped output by 'Food'. You can override using the
## `.groups` argument.
ggplot(top_food_area,aes(x=reorder(Food,intake_avg), y=intake_avg,fill=area)) +
  geom_bar(stat="identity",position="dodge", width = 0.5, alpha = 0.7) +
  labs(title="Differences in Food Intake for Urban & Rural Areas",
       caption= "Source:Global Dietary Database",
       y="Average Food Daily Intake",x='')+
  theme_pander()+
  coord_flip()
## Warning: Removed 13 rows containing missing values (geom_bar).

Average Food Intake Worldwide by Year

# Average Food Intake Worldwide by Year
top_year <- food_life %>%
  filter(age==999,female==999,urban==999,edu==999) %>%
  group_by(year,Food)%>%
  summarise(intake_avg = round(mean(intake),2))%>%
  arrange(desc(intake_avg)) 
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot1 <- top_year%>%
  ggplot(., aes(x = year, y=intake_avg,group=Food,color=Food)) + 
  geom_line( size=1)+
  geom_point(size=1)+
  labs(x='Year', y='Intake Avg(g/day)', title='Average Food Intake Worldwide')+
  theme_classic()

ggplotly(plot1)

food & life in 10 countries

a <- food_life %>%
  filter(
         varnum ==7,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1,color='steelblue')+
  geom_point(size=2)+
  labs(x='Year', y='Intake (g/day)', title="Refined grains")+
  theme_classic()

b <- food_life %>%
  filter(
         varnum ==2,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1)+geom_point(size=2)+
  labs(x='Year', y='Intake (g/day)', title="Non-starchy vegetables")+
  theme_classic()

c <- food_life %>%
  filter(
         varnum ==1,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1)+
  geom_point(size=2)+
  labs(x='Year', y='Intake (g/day)', title="Fruits")+
  theme_classic()

d <- food_life %>%
  filter(
         varnum ==3,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1)+geom_point(size=2)+
  labs(x='Year', y='Intake (g/day)', title="Potatoes")+
  theme_classic()

e <- food_life %>%
  filter(
         varnum ==8,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1)+geom_point(size=2)+
  labs(x='Year', y='Intake (g/day)', title="Whole grains")+
  theme_classic()

lf <- food_life %>%
  filter(
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=life_exp,group=Country,color=Country)) + 
  geom_line( size=1,alpha=0.7)+
  geom_point(size=2)+
  labs(x='Year', y='(year-old)', title="Life Expectancy")+
  theme_classic()

figure = ggarrange(lf,a, b,c, d,e,ncol = 2, nrow = 3,
                   common.legend = TRUE, legend = "bottom")
## Warning: Removed 364 row(s) containing missing values (geom_path).
## Warning: Removed 364 rows containing missing values (geom_point).
## Warning: Removed 364 row(s) containing missing values (geom_path).
## Warning: Removed 364 rows containing missing values (geom_point).
annotate_figure(figure,top = text_grob("Life Expectancy & Food Intake", 
                                color = "black", face = "bold", size = 14))

food & life expectancy in China

life expectancy

c_lf <- food_life %>%
  filter(iso3=='CHN',
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=life_exp,group=Country,color=Country)) + 
  geom_line( size=1,alpha=0.7,color='steelblue')+
  geom_point(size=2)+
  labs(x='Year', y='(year-old)', title="Life Expectancy")+
  theme_classic()
c_lf

food intake in China

for (x in 1:14) {
  food <- subset(food_code, varnum == x)$Food
  chn <- food_life %>%
  filter(iso3=='CHN',
         varnum==x,
         age==999,female==999,urban==999,edu==999) %>%
  ggplot(., aes(x = year, y=intake,group=Country,color=Country)) + 
  geom_line( size=1)+
  geom_point(size=2)+
  labs(x='Year', y='Intake(g/day)', title=food)+
  theme_classic()
  print(chn)
}

Average life expectancy by year

life_year <- life_df %>%
  mutate(year=as.character(year))%>%
  group_by(year)%>%
  summarise(avg_life = mean(life_exp))

plot_life_year <- life_year%>%
  ggplot(.,aes(x = as.numeric(year), y=avg_life) ) + 
  geom_line( alpha=0.7,size=1,color='steelblue')+
  geom_point(size=2,alpha=0.7) +
  labs(x='Year', y='life expectancy', title=' Average life expectancy Worldwide')+
  theme_classic()

ggplotly(plot_life_year)

life expectancy Ranking

food_life2018 <- food_life %>%
    filter(age==999,female==999,urban==999,edu==999,year==2018)
# top countries by life exp
top_life <- food_life2018%>%
  select(Country,life_exp)%>%
  rename(country=Country)%>%
  arrange(desc(life_exp))%>%
  distinct()


top_life[0:20,] %>%
  ggplot(aes(x=reorder(country, life_exp,na.rm=TRUE),y=life_exp)) +
  theme_classic()+ 
  geom_bar(aes(fill=life_exp),position = "dodge",alpha=0.8, stat = "identity")+
  coord_flip() +
  labs(y='Life expectancy',
     x='Country',
     title='Life Expectancy Worldwide',
     fill='life expectancy',
    caption='Data from the World Bank') 

relationship between food intake & life expectancy

for (x in 1:14) {
  food <- subset(food_code, varnum == x)$Food
  
  f_life <- food_life2018 %>%
    filter(varnum==x)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point( size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake(g/d)', y='Life Expectancy', title=food)+
  theme_classic()
  print(f_life)
}
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Removed 4 rows containing missing values (geom_point).

p7<- food_life2018 %>%
  filter(varnum==7)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Life expectancy', title="Refined grains")+
  theme_classic()

p2 <- food_life2018 %>%
  filter(varnum==2)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Life expectancy', title="Non-starchy vegetables")+
  theme_classic()

p1 <- food_life2018 %>%
  filter(varnum==1)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Life expectancy', title="Fruits")+    
  theme_classic()

p3 <- food_life2018 %>%
  filter(varnum==3)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Life expectancy', title="Potatoes")+  
  theme_classic()

p8 <- food_life2018 %>%
  filter(varnum==8)%>%
  ggplot(., aes(x = intake, y=life_exp)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Life expectancy', title="Whole grains")+  
  theme_classic()


figure1 = ggarrange(p7,p2,p1,p3,p8,ncol = 2, nrow = 3)
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
annotate_figure(figure1,top = text_grob("Life Expectancy & Food Intake", 
                                color = "red", face = "bold", size = 14))

top countries by cancer rate

# top countries by cancer rate
top_cancer <- food_life2018%>%
  select(Country,cancer_rate)%>%
  arrange(desc(cancer_rate))%>%
  rename(country=Country)%>%
  distinct()


top_cancer[0:20,] %>%
  ggplot(aes(x=reorder(country, cancer_rate,na.rm=TRUE),y=cancer_rate)) +
  theme_classic()+ 
  geom_bar(aes(fill=cancer_rate),position = "dodge",alpha=0.8, stat = "identity")+
  coord_flip() +
  labs(y='cancer rate',
     x='Country',
     title='Global Cancer Incidence',
     subtitle='cancer rate = Num of incidence/100,000',
     fill='cancer rate',
    caption='Data from World Cancer Research Fund International') 

relationship between food & cancer rate

Refined grains & cancer rate

for (x in 1:14) {
  food <- subset(food_code, varnum == x)$Food
  
  f_cancer <- food_life2018 %>%
    filter(varnum==x)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point( size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title=food)+
  theme_classic()
  print(f_cancer)
}
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Removed 30 rows containing missing values (geom_point).

cancer7<- food_life2018 %>%
  filter(varnum==7)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Refined grains")+
  theme_classic()

cancer2 <- food_life2018 %>%
  filter(varnum==2)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Non-starchy vegetables")+
  theme_classic()

cancer1 <- food_life2018 %>%
  filter(varnum==1)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Fruits")+    
  theme_classic()

cancer3 <- food_life2018 %>%
  filter(varnum==3)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Potatoes")+  
  theme_classic()

cancer11 <- food_life2018 %>%
  filter(varnum==11)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Seafoods")+  
  theme_classic()

cancer5 <- food_life2018 %>%
  filter(varnum==5)%>%
  ggplot(., aes(x = intake, y=cancer_rate)) + 
  geom_point(size=1)+
  geom_smooth(method='lm', formula= y~x)+
  labs(x='intake', y='Cancer Rate', title="Beans & legumes")+  
  theme_classic()

figure1 = ggarrange(cancer7,cancer2,cancer1,cancer3,cancer11,cancer5,
                   ncol = 2, nrow = 3)
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).
annotate_figure(figure1,top = text_grob("Cancer Rate & Food Intake", 
                                color = "red", face = "bold", size = 14))

basic information of food intake

basic_intake <- food_life %>%
  filter(age==999,female==999,urban==999,edu==999) %>%
  rename(continent = superregion2) %>%
  mutate_at(c('intake'), function(x) round(x,2))%>%
  select(continent,year,Country,Food,intake) %>%
  arrange(Country)

datatable(basic_intake,filter = list(position = "top"),rownames = FALSE) 

basic information of life expectancy & cancer rate

basic_life <- food_life %>%
  filter(age==999,female==999,urban==999,edu==999) %>%
  rename(continent = superregion2) %>%
  mutate_at(c('cancer_rate','life_exp'), function(x) round(x,2))%>%
  select(continent,year,Country,life_exp,cancer_rate) %>%
  arrange(Country)

datatable(basic_life,filter = list(position = "top"),rownames = FALSE)